{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "02161.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/02161.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GWnQ2gTgIgIn",
        "colab_type": "text"
      },
      "source": [
        "## 5.12 稠密连接网络 ( DenseNet )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kMdVvP5DIwHq",
        "colab_type": "text"
      },
      "source": [
        "### 5.12.1 稠密块"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KQjf8_wvI0O6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import torch \n",
        "from torch import nn, optim \n",
        "import torch.nn.functional as F \n",
        "import d2l \n",
        "import time \n",
        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
        "\n",
        "\n",
        "def conv_block(in_channels, out_channels):\n",
        "    blk = nn.Sequential(nn.BatchNorm2d(in_channels), \n",
        "            nn.ReLU(), \n",
        "            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))\n",
        "    return blk "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Us959MDJKDCT",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class DenseBlock(nn.Module):\n",
        "    def __init__(self, num_convs, in_channels, out_channels):\n",
        "        super(DenseBlock, self).__init__()\n",
        "        net = []\n",
        "        for i in range(num_convs):\n",
        "            in_c = in_channels + i * out_channels \n",
        "            net.append(conv_block(in_c, out_channels))\n",
        "        self.net = nn.ModuleList(net)\n",
        "        self.out_channels = in_channels + num_convs * out_channels \n",
        "\n",
        "    def forward(self, X):\n",
        "        for blk in self.net:\n",
        "            Y = blk(X)\n",
        "            X = torch.cat((X, Y), dim=1)\n",
        "        return X"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dspDRZ3WK_EW",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "e85f386e-6004-47bc-e7bf-5cb42285e69a"
      },
      "source": [
        "blk = DenseBlock(2, 3, 10)\n",
        "X = torch.rand(4, 3, 8, 8)\n",
        "Y = blk(X)\n",
        "Y.shape "
      ],
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([4, 23, 8, 8])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "E1cJY8KGLIcG",
        "colab_type": "text"
      },
      "source": [
        "### 5.12.2 过渡层"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MiT6veViLel6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def transition_block(in_channels, out_channels):\n",
        "    blk = nn.Sequential(\n",
        "        nn.BatchNorm2d(in_channels), \n",
        "        nn.ReLU(), \n",
        "        nn.Conv2d(in_channels, out_channels, kernel_size=1), \n",
        "        nn.AvgPool2d(kernel_size=2, stride=2)\n",
        "    )\n",
        "    return blk "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Y_-WOrJXL1VZ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "f0376c7d-3075-4881-bed8-aad3eb849152"
      },
      "source": [
        "blk = transition_block(23, 10)\n",
        "blk(Y).shape "
      ],
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "torch.Size([4, 10, 4, 4])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 32
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S8Dd_tSKMB6d",
        "colab_type": "text"
      },
      "source": [
        "### 5.12.3 DenseNet 模型"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BAETI-UyMKlC",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "net = nn.Sequential(\n",
        "    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), \n",
        "    nn.BatchNorm2d(64), \n",
        "    nn.ReLU(), \n",
        "    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n",
        ")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PstRoN-SMb0Y",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "num_channels, growth_rate = 64, 32 \n",
        "num_convs_in_dense_blocks = [4, 4, 4, 4]\n",
        "\n",
        "for i, num_convs in enumerate(num_convs_in_dense_blocks):\n",
        "    DB = DenseBlock(num_convs, num_channels, growth_rate)\n",
        "    net.add_module('DenseBlock_%d' % i, DB)\n",
        "    num_channels = DB.out_channels \n",
        "    if i != len(num_convs_in_dense_blocks) - 1:\n",
        "        net.add_module('transition_block_%d' % i, transition_block(num_channels, num_channels // 2))\n",
        "        num_channels = num_channels // 2 "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HSQTfgMXNWr9",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "net.add_module('BN', nn.BatchNorm2d(num_channels))\n",
        "net.add_module('relu', nn.ReLU())\n",
        "net.add_module('global_avg_pool', d2l.GlobalAvgPool2d())\n",
        "net.add_module('fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10)))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "X7bl3wk-N7bG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 269
        },
        "outputId": "006209d2-e400-4cd2-e163-719bd48f5720"
      },
      "source": [
        "X = torch.rand((1, 1, 96, 96))\n",
        "for name, layer in net.named_children():\n",
        "    X = layer(X)\n",
        "    print(name, 'output shape:\\t', X.shape)"
      ],
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0 output shape:\t torch.Size([1, 64, 48, 48])\n",
            "1 output shape:\t torch.Size([1, 64, 48, 48])\n",
            "2 output shape:\t torch.Size([1, 64, 48, 48])\n",
            "3 output shape:\t torch.Size([1, 64, 24, 24])\n",
            "DenseBlock_0 output shape:\t torch.Size([1, 192, 24, 24])\n",
            "transition_block_0 output shape:\t torch.Size([1, 96, 12, 12])\n",
            "DenseBlock_1 output shape:\t torch.Size([1, 224, 12, 12])\n",
            "transition_block_1 output shape:\t torch.Size([1, 112, 6, 6])\n",
            "DenseBlock_2 output shape:\t torch.Size([1, 240, 6, 6])\n",
            "transition_block_2 output shape:\t torch.Size([1, 120, 3, 3])\n",
            "DenseBlock_3 output shape:\t torch.Size([1, 248, 3, 3])\n",
            "BN output shape:\t torch.Size([1, 248, 3, 3])\n",
            "relu output shape:\t torch.Size([1, 248, 3, 3])\n",
            "global_avg_pool output shape:\t torch.Size([1, 248, 1, 1])\n",
            "fc output shape:\t torch.Size([1, 10])\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B9wbUQOJOJxg",
        "colab_type": "text"
      },
      "source": [
        "### 5.12.4 获取数据并训练模型"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DB30PSfROTt2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 118
        },
        "outputId": "9be7b237-c89b-40dd-e619-3c0df79ddc09"
      },
      "source": [
        "batch_size = 256 \n",
        "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)\n",
        "\n",
        "lr, num_epochs = 0.001, 5 \n",
        "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
        "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
      ],
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "training on  cuda\n",
            "epoch 1, loss 0.4460, train acc 0.843, test acc 0.811, time 21.5 sec\n",
            "epoch 2, loss 0.2681, train acc 0.902, test acc 0.882, time 21.3 sec\n",
            "epoch 3, loss 0.2302, train acc 0.916, test acc 0.906, time 21.3 sec\n",
            "epoch 4, loss 0.2099, train acc 0.922, test acc 0.911, time 21.3 sec\n",
            "epoch 5, loss 0.1887, train acc 0.930, test acc 0.901, time 21.4 sec\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}